# SETWD ===========================
rm(list=ls())


# LIBRARIES ===========================

library(tidyverse)
library(foreign)
library(stargazer)
library(lmtest)
library(multiwayvcov)

# IMPORT DATA ===========================

dfgi <- read.csv("data/sve-08_04_24.csv",stringsAsFactors = F)

# NON-ECONOMIC FACTORS ===========================

## FIG 4. Logit Regression Predicting Erosion ----

temp <- dfgi%>%
  filter(!is.na(gini_disp),!is.na(gdppc),!is.na(prs.ge),!is.na(v2cacamps),
         !is.na(democracy_duration))

temp <- temp%>%
  mutate(gini.stan=scale(gini_disp),
         gdp.stan=scale(log(gdppc)),
         capacity.stan=scale(prs.ge),
         plz.stan=scale(v2cacamps),
         age.stan=scale(democracy_duration),
         year.stan=scale(year))

m5 <- glm(erosion.strict ~ gini.stan + gdp.stan + year.stan + capacity.stan + 
            age.stan + plz.stan + region, 
          data=temp, family=binomial)
crse5 <- coeftest(m5, cluster.vcov(m5, temp$country.name))
crse5

var.name <- c("Gini","Logged GDP per capita","Year","State Capacity",
              "Age of Democracy","Polarization")
beta <- crse5[2:7,1]
se <- crse5[2:7,2]
sig <- as.numeric(crse5[2:7,4]<0.05)
res <- data.frame(var.name,beta,se,sig)%>%
  mutate(var.num=c(1,2,3,6,5,4))%>%
  mutate(vf=as.factor(var.name))%>%
  mutate(vf=fct_reorder(vf, -var.num))

res.allcontrols <- res

res.allcontrols%>%
  ggplot(aes(x=beta,y=vf,shape=as.factor(sig)))+
  geom_vline(xintercept=0,color="gray75")+
  geom_errorbarh(aes(xmin=beta-1.96*se,xmax=beta+1.96*se),height=0)+
  geom_point(fill="white")+
  labs(x="Standardized Coefficient",y="")+
  scale_shape_manual(values=c(21,19))+
  theme_bw()+
  theme(legend.position = "none",panel.grid = element_blank())

ggsave("figures/tab2-coefplot.png",width=4.5,height=2.15)



## AUC estimates ----

auc <- function(temp){
  pos.scores <- temp%>%
    filter(!is.na(pred1))%>%
    filter(erosion.strict==1)%>%
    pull(pred1)
  
  neg.scores <- temp%>%
    filter(!is.na(pred1))%>%
    filter(erosion.strict==0)%>%
    pull(pred1)
  
  # estimate AUC
  set.seed(135102)
  reps <- 1000000
  mean(sample(pos.scores,reps,replace=T) > sample(neg.scores,reps,replace=T))
}


temp <- dfgi%>%
  filter(!is.na(v2cacamps),!is.na(gini_disp))

m1 <- glm(erosion.strict ~ v2cacamps + year, 
          data=temp, family="binomial")
temp$pred1 <- predict(m1, temp, type="response")
auc(temp)
# AUC: 0.8304

m1 <- glm(erosion.strict ~ v2cacamps + year + gini_disp, 
          data=temp, family="binomial")
temp$pred1 <- predict(m1, temp, type="response")
auc(temp)
# AUC: 0.8785



